################################################################################
#### Title: Senders' Data Cleaning
#### Year: 2024
################################################################################

#### Load Packages and Clear Working Directory ####
rm(list=ls())

# Set working directory
setwd("/Users/martinnaunov/Desktop/Desktop - Martin’s MacBook Pro (2)/Persuasion/1_rawdata")



library(dplyr)
library(tidyverse) 
library(stringi)
library(ltm)
library(psych)
library(scales)

#### Data Cleaning ####

df <- read_csv("senders_raw.csv") %>% # Read in
  slice(-c(1,2)) # Cut out to two rows (junk)

df <- type_convert(df)

df <- df %>% filter(Status==0) # Removes test runs completed by researchers

# Classify issue side for each respondent
df <- df %>%
  mutate(
    side = factor(case_when(
      Q4.1==1 ~ "imm_lib",
      Q4.1==2 ~ "imm_cons",
      Q4.4==1 ~ "imm_lib",
      Q4.4==2 ~ "imm_cons",
      Q5.1==1 ~ "env_lib",
      Q5.1==2 ~ "env_cons",
      Q5.4==1 ~ "env_lib",
      Q5.4==2 ~ "env_cons",
      Q6.1==1 ~ "trans_cons",
      Q6.1==2 ~ "trans_lib",
      Q6.4==1 ~ "trans_cons",
      Q6.4==2 ~ "trans_lib"))
  )

# Classify whether respondent chose to do the writing task or not.
df <- df %>% mutate(
  write_yes = case_when(
    Q15.4==1 ~ T,
    Q16.4==1 ~ T,
    Q17.4==1 ~ T,
    Q18.4==1 ~ T,
    Q19.4==1 ~ T,
    Q20.4==1 ~ T,
    TRUE ~ FALSE))


### Rename Variables ### 

df <- df %>% 
  rename(continue = Q2.1,
         respid = ResponseId,
         imm_favor = Q4.1,
         imm_favor_strength = Q4.2,
         imm_oppose_strength = Q4.3, 
         imm_lean = Q4.4, 
         env_favor = Q5.1,
         env_favor_strength = Q5.2, 
         env_oppose_strength = Q5.3, 
         env_lean = Q5.4, 
         trans_favor = Q6.1, 
         trans_favor_strength = Q6.2, 
         trans_oppose_strength = Q6.3, 
         trans_lean = Q6.4, 
         partyid_5 = Q7.1, 
         repub_strength = Q7.2,
         demo_strength = Q7.3,
         party_lean = Q7.4, 
         pol_intr = Q8.1, 
         discuss_pol = Q8.2, 
         affpol_repub = Q9.1_1, 
         affpol_demo = Q9.1_2, 
         pol_know1 = Q10.1, 
         pol_know2 = Q10.2, 
         pol_know3 = Q10.3, 
         pol_know4 = Q10.4, 
         pol_know5_1 = Q10.5_1, 
         pol_know5_2 = Q10.5_2, 
         pol_know5_3 = Q10.5_3,
         pol_know5_4 = Q10.5_4,
         pol_know5_5 = Q10.5_5, 
         pol_know5_6 = Q10.5_6, 
         pol_know5_7 = Q10.5_7,
         pol_know_cheat = Q10.6, 
         pol_know_admit = Q10.7,
         persp_tak1 = Q11.1_1, 
         persp_tak2 = Q11.1_2, 
         persp_tak3 = Q11.1_3, 
         persp_tak4 = Q11.1_4, 
         persp_tak5 = Q11.1_5, 
         persp_tak6 = Q11.1_6, 
         persp_tak7 = Q11.1_7, 
         nfc1 = Q88_1,
         nfc2 = Q88_2, 
         nfc3 = Q88_3, 
         nfc4 = Q88_4, 
         nfc5 = Q88_5, 
         nfc6 = Q88_6, 
         nfc7 = Q88_7, 
         nfc8 = Q88_8, 
         nfc9 = Q88_9, 
         nfc10 = Q88_10, 
         nfc11 = Q88_11, 
         nfc12 = Q88_12, 
         nfc13 = Q88_13, 
         nfc14 = Q88_14, 
         nfc15 = Q88_15, 
         nfc16 = Q88_16, 
         nfc17 = Q88_17, 
         nfc18 = Q88_18, 
         self_est1 = Q13.1_1, 
         self_est2 = Q13.1_2, 
         self_est3 = Q13.1_3,
         self_est4 = Q13.1_4, 
         self_est5 = Q13.1_5, 
         edu = Q14.1, 
         female = Q14.2, 
         income = Q14.3, 
         age = Q14.4, 
         race1 = Q14.5_1,
         race2 = Q14.5_2, 
         race3 = Q14.5_3, 
         race4 = Q14.5_4, 
         race5 = Q14.5_5,
         race6 = Q14.5_6, 
         race7 = Q14.5_7,
         eligible = Q101, 
         imm_stay_fav = Q15.2_1, 
         imm_write_fav = Q15.4, 
         imm_stay_opp = Q16.2_1, 
         imm_write_opp = Q16.4, 
         trans_bath_fav = Q17.2_1, 
         trans_write_fav = Q17.4, 
         trans_bath_opp = Q18.2_1, 
         trans_write_opp = Q18.4, 
         env_spend_opp = Q19.2_1, 
         env_write_opp = Q19.4, 
         env_spend_fav = Q20.2_1, 
         env_write_fav = Q20.4, 
         writing_task = Q21.1,
         time = `Q21.3_Page Submit`,
         task_persuade = Q22.1,
         task_expect = Q22.2, 
         task_enjoy = Q22.3, 
         task_effort = Q22.4, 
         comment_writers = Q23.1, 
         comment_nonwriters = Q24.1) %>% 
  mutate(
    across(c(continue, imm_favor, imm_favor_strength, imm_oppose_strength, 
             imm_lean, env_favor, env_favor_strength, env_oppose_strength, 
             env_lean, trans_favor, trans_favor_strength, 
             trans_oppose_strength, trans_lean, partyid_5, repub_strength,
             demo_strength, party_lean, pol_intr, discuss_pol, affpol_repub, 
             affpol_demo, pol_know1, pol_know2, pol_know3, pol_know4, 
             pol_know5_1, pol_know5_2, pol_know5_3, pol_know5_4, pol_know5_5, 
             pol_know5_6, pol_know5_7, pol_know_admit,
             persp_tak1, persp_tak2, persp_tak3, persp_tak4, persp_tak5, 
             persp_tak6, persp_tak7, nfc1, nfc2, nfc3, nfc4, nfc5, nfc6, 
             nfc7, nfc8, nfc9, nfc10, nfc11, nfc12, nfc13, nfc14, nfc15, 
             nfc16, nfc17, nfc18, self_est1, self_est2, self_est3, self_est4, 
             self_est5, edu, female, income, age, race1, race2, race3, race4, 
             race5, race6, race7, eligible, imm_stay_fav, imm_write_fav, 
             imm_stay_opp, imm_write_opp, trans_bath_fav, trans_write_fav, 
             trans_bath_opp, trans_write_opp, env_spend_opp, env_write_opp, 
             env_spend_fav, env_write_fav, time, task_expect, 
             task_persuade, task_enjoy, task_effort), as.numeric)
  ) %>% 
  mutate(
    across(c(pol_know_cheat, writing_task, comment_writers, comment_nonwriters), 
           as.character)
  )

# Create an alternative dataframe to ensure that the alternative coding steps produce the same results
# as your existing dataframe
df2 <- df

### Partisanship ### 
## Make a seven-point partisanship scale with 1 as strong Democrat and 7 as 
## Strong Republican

# Strong Democrat (selected Democrat (1) in partyid_5  and very
# strong (1) in demo_strength)

# Not Very Strong Democrat (selected Democrat (1) in partyid_5 and 
# not very strong (2) in demo_strength)

# Lean Democrat (selected Independent (3), or no preference (4), or other party
# (5) for partyid_5 and Closer to Democratic (2) in party_lean)

# Pure Independent (selected independent (3), or no preference (4), or other party
# (5) for party_id5 and neither (3) in party_lean)

# Lean Republican (selected independent (3), or no preference (4), or other party
# (5)and Closer to Republican (1) in party_lean)

# Not Very Strong Republican (selected Republican (2) for partyid_5 and Not Very 
# Strong (2) in repub_strength)

# Strong Republican (selected Republican (2) for party_id5 and Very Strong (1) in
# repub_strength) 
df <- df %>% 
  mutate(
    partyid_7 = case_when(
      demo_strength == 1 ~ 1, 
      demo_strength == 2 ~ 2, 
      party_lean == 2 ~ 3, 
      party_lean == 3 ~ 4, 
      party_lean == 1 ~ 5, 
      repub_strength == 2 ~ 6, 
      repub_strength == 1 ~ 7,
      TRUE ~ NA_integer_
    )
  )

### Affective Polarization: affpol ###

## Create a new variable/column named affpol by subtracting the results from 
## affpol_demo from those in affpol_repub
## Take the absolute value so this is a general score of affective polarization
df <- df %>% 
  mutate(
    affpol = abs(affpol_demo - affpol_repub), na.rm = TRUE
    )


### Attitude Strength ### 

## Immigration ## 
# Make a four-point variable for attitude strength named imm_att_strength
# Code as 0 for those who have a neutral opinion. 
# Code as 1 for those who slightly favor or oppose the policy
# Code as 2 for those who favor or oppose the policy but not strongly
# Code as 3 for those who strongly favor or oppose the policy

# Neither Favor nor Disfavor (0)
# If they answered Neither favor nor oppose (3) for imm_favor and 
# Don't lean one way (3) or the other for imm_lean

# Slightly Favor (1)
# If they answered Neither favor nor oppose (3) for imm_favor and Lean toward 
# favoring (1) for imm_lean

# Slightly Oppose (1)
# If they answered Neither favor nor oppose (3) for imm_favor and Lean toward 
# opposing (2) for imm_lean

# Favor not so Strongly (2)
# If they answered Favor (1) for imm_favor and Not so strongly (2) for
# imm_favor_strength

# Oppose not so strongly (2)
# If they answered Oppose (2) for imm_favor and Not so strongly (2) for 
# imm_oppose_strength

# Strongly Favor (3)
# If they answered Favor (1) for imm_favor and Strongly (1) for 
# imm_favor_strength

# Strongly Oppose (3)
# If they answered Oppose (2) for imm_favor and Strongly (1) for
# imm_oppose_strength
df <- df %>% 
  mutate(
    imm_att_strength = case_when(
      imm_lean == 3 ~ 0, 
      imm_lean == 1 ~ 1, 
      imm_lean == 2 ~ 1, 
      imm_favor_strength == 2 ~ 2, 
      imm_oppose_strength == 2 ~ 2, 
      imm_favor_strength == 1 ~ 3, 
      imm_oppose_strength == 1 ~ 3, 
      TRUE ~ NA_integer_
    )
  )

## Environment ## 
# Make a four-point variable for attitude strength named env_att_strength
# Code as 0 for those who have a neutral opinion. 
# Code as 1 for those who slightly favor or oppose the policy
# Code as 2 for those who favor or oppose the policy but not strongly
# Code as 3 for those who strongly favor or oppose the policy

# Neither Favor nor Disfavor (0)
# If they answered Neither favor nor oppose (3) for env_favor and 
# Don't lean one way (3) or the other for env_lean

# Slightly Favor (1)
# If they answered Neither favor nor oppose (3) for env_favor and Lean toward 
# favoring (1) for env_lean

# Slightly Oppose (1)
# If they answered Neither favor nor oppose (3) for env_favor and Lean toward 
# opposing (2) for env_lean

# Favor not so Strongly (2)
# If they answered Favor (1) for env_favor and Not so strongly (2) for
# env_favor_strength

# Oppose not so strongly (2)
# If they answered Oppose (2) for env_favor and Not so strongly (2) for 
# env_oppose_strength

# Strongly Favor (3)
# If they answered Favor (1) for env_favor and Strongly (1) for 
# env_favor_strength

# Strongly Oppose (3)
# If they answered Oppose (2) for env_favor and Strongly (1) for
# env_oppose_strength

# Alternative Coding
df <- df %>% 
  mutate(env_att_strength = case_when(
    env_lean == 3 ~ 0, 
    env_lean == 1 ~ 1, 
    env_lean == 2 ~ 1, 
    env_favor_strength == 2 ~ 2, 
    env_oppose_strength == 2 ~ 2, 
    env_favor_strength == 1 ~ 3, 
    env_oppose_strength == 1 ~ 3, 
    TRUE ~ NA_integer_
    )
  ) 

## Transgender ##
# Make a four-point variable for attitude strength named trans_att_strength
# Code as 0 for those who have a neutral opinion. 
# Code as 1 for those who slightly favor or oppose the policy
# Code as 2 for those who favor or oppose the policy but not strongly
# Code as 3 for those who strongly favor or oppose the policy

# Neither Favor nor Disfavor (0)
# If they answered Neither favor nor oppose (3) for trans_favor and 
# Don't lean one way (3) or the other for trans_lean

# Slightly Favor (1)
# If they answered Neither favor nor oppose (3) for trans_favor and Lean toward 
# favoring (1) for trans_lean

# Slightly Oppose (1)
# If they answered Neither favor nor oppose (3) for trans_favor and Lean toward 
# opposing (2) for trans_lean

# Favor not so Strongly (2)
# If they answered Favor (1) for trans_favor and Not so strongly (2) for
# trans_favor_strength

# Oppose not so strongly (2)
# If they answered Oppose (2) for trans_favor and Not so strongly (2) for 
# trans_oppose_strength

# Strongly Favor (3)
# If they answered Favor (1) for trans_favor and Strongly (1) for 
# trans_favor_strength

# Strongly Oppose (3)
# If they answered Oppose (2) for trans_favor and Strongly (1) for
# trans_oppose_strength
df <- df %>% 
  mutate(
    trans_att_strength = case_when(
      trans_lean == 3 ~ 0, 
      trans_lean == 1 ~ 1, 
      trans_lean == 2 ~ 1, 
      trans_favor_strength == 2 ~ 2, 
      trans_oppose_strength == 2 ~ 2, 
      trans_favor_strength == 1 ~ 3, 
      trans_oppose_strength == 1 ~ 3, 
      TRUE ~ NA_integer_
    )
  )

### Attitude Strength ###
## Create an attitude strength summary variable by coalescing these columns into
## one general column of attitude strength called att_strength
## Note that the pmax function obtains the parallel maxima or two or more 
## vectors
df <- df %>% 
  mutate(att_strength = pmax(imm_att_strength, env_att_strength, trans_att_strength, na.rm = TRUE))

### Political Interest ###

df <- df %>% 
  mutate(
    pol_intr = rescale(recode(pol_intr, 
                              "1" = 5, 
                              "2" = 4,
                              "3" = 3, 
                              "4" = 2, 
                              "5" = 1), to = c(0, 1)), 
    
    discuss_pol = rescale(recode(discuss_pol, 
                                 "1" = 0, 
                                 "2" = 1, 
                                 "3" = 2, 
                                 "4" = 3, 
                                 "5" = 4, 
                                 "6" = 5, 
                                 "7" = 6,
                                 "8" = 7), to = c(0, 1))
  ) %>% 
  mutate(
    pol_intr_full = rowMeans(dplyr::select(.,pol_intr, discuss_pol), na.rm = TRUE)
  )

# Check Cronbach's Alpha 
polintr <- df %>% 
  dplyr::select(pol_intr, discuss_pol) %>% 
  data.frame(.)

psych::alpha(polintr, na.rm = T) 


# Make sure all the correlations are positive to ensure that 
# everything is properly coded. 
lowerCor(polintr) 
rm(polintr)

### Political Knowledge: pol_know_avg ###

## pol_know1: 
# Do you happen to know what position Kamala Harris held 
# in California before she was a United States senator? 
# Make this a binary variable with 1 for the correct answer (Attorney General) and
# 0 for incorrect answers. 

## pol_know2:Do you happen to know 
# which state passed an abortion law that will be reviewed by the 
# Supreme Court in December? 
# Make this a binary variable with 1 for the correct answer (Mississippi) and 
# 0 for incorrect answers 

## pol_know3: 
# Do you happen to know what position Merrick Garland holds in the federal government?
# Make this a binary variable with 1 for the correct answer (Attorney General) and 
# 0 for incorrect answers

## pol_know4
# Do you happen to know the name of the senator from Georgia who faces a 
# challenge to his or her seat in the upcoming 2022 midterm elections?
# Make this a binary variable with 1 for the correct answer (Raphael Warnock) and 
# 0 for incorrect answers

## pol_know5_1:pol_know5_7
# Do you happen to know which two Democratic senators have stated publicly that 
# they oppose ending the filibuster? (Check two.)
# pol_know5_1 (correct---Krysten Sinema)
# pol_know5_2 (correct---Joe Manchin)
# pol_know5_3:pol_know5_7(incorrect---Sherrod Brown, Dianne Feinstein, Tim Kaine,
# Mark Kelly, and Amy Klobuchar respectively)

df <- df %>% 
  mutate(
    across(c(pol_know1:pol_know4), ~ifelse(. == 1, 1, 0)), 
    pol_know5 = case_when(
      is.na(pol_know5_1) == TRUE & 
        is.na(pol_know5_2) == TRUE &
        is.na(pol_know5_3) == TRUE & 
        is.na(pol_know5_4) == TRUE & 
        is.na(pol_know5_5) == TRUE & 
        is.na(pol_know5_6) == TRUE & 
        is.na(pol_know5_7) == TRUE ~ NA_integer_, 
      pol_know5_1 == 1 & pol_know5_2 == 1 ~ 1, 
      TRUE ~ 0)
  ) %>% 
  mutate(
    pol_know_avg = rowMeans(dplyr::select(., pol_know1, pol_know2, pol_know3, pol_know4, pol_know5), na.rm = TRUE)
  )


# Calculate alpha 
pol_know <- df %>% 
  dplyr::select(pol_know1, pol_know2, pol_know3, pol_know4, pol_know5) %>% 
  data.frame(.)

psych::alpha(pol_know, na.rm = TRUE)

# Check Correlations 
lowerCor(pol_know)

rm(pol_know)


### Perspective Taking: persp_tak ###

## Reverse Code two of the items: 
# persp_tak1: I sometimes find it difficult to see things from the "other guy's" 
# point of view

# persp_tak4: If I'm sure I'm right about something, I don't waste much time 
# listening to other people's arguments.

df <- df %>% 
  mutate(
    across(c(persp_tak1, persp_tak4), ~recode(., 
                                              "5" = 1, 
                                              "4" = 2, 
                                              "3" = 3, 
                                              "2" = 4,
                                              "1" = 5))
  ) %>% 
  mutate(
    persp_tak = rowMeans(dplyr::select(., starts_with("persp_tak")), na.rm = TRUE)
  )

# Calculate alpha 
persp_tak <- df %>% 
  dplyr::select(persp_tak1, persp_tak2, persp_tak3, persp_tak4, persp_tak5,
                persp_tak6, persp_tak7) %>% 
  data.frame(.)

psych::alpha(persp_tak)

# Check correlations
lowerCor(persp_tak)

rm(persp_tak)

### Need for Cognition: nfc ###

## Reverse code nine of the questions

# nfc3: thinking is not my idea of fun

# nfc4: I would rather do something that requires little thought than something
# that is sure to challenge my abilities

# nfc5: I try to anticipate and avoid situations where there is likely a change
# that I will have to think in depth about something. 

# nfc7: I only think as hard as I have to 

# nfc8: I prefer to think about small, daily porjects to long term ones. 

# nfc9:I like tasks that require little thought once I've learned them. 

# nfc12: learning new ways to think doesn't excite me much

# nfc16: I feel relief rather than satisfaction after completing a task that
# requires a lot of mental effort

# nfc17: It is enough for me that something gets the job done, I don't care how
# or why it works

# Create a new variable called nfc which takes the average of 
# the nfc scores 
# Cronbach's alpha 

df <- df %>% 
  mutate(
    across(c(nfc3, nfc4, nfc5, nfc7, nfc8, nfc9, nfc12, 
             nfc16, nfc17), ~recode(., 
                                    "5" = 1, 
                                    "4" = 2, 
                                    "3" = 3, 
                                    "2" = 4, 
                                    "1" = 5))
  ) %>% 
  mutate(
    nfc = rowMeans(dplyr::select(., starts_with("nfc")), na.rm = TRUE)
  )

nfc <- df %>% 
  dplyr::select(nfc1, nfc2, nfc3, nfc4, nfc5, nfc6, nfc7, nfc8, nfc9, nfc10,
                nfc11, nfc12, nfc13, nfc14, nfc15, nfc16, nfc17, nfc18)

lowerCor(nfc)
psych::alpha(nfc)

### Self-Esteem: self_est ###
## Reverse code two of the questions

# self_est2: At times I think I am no good at all

# self_est4: All in all, I am inclined to feel that I am a failure

# Create a new variable called self_est which takes the average of the self-esteem
# scores
df <- df %>% 
  mutate(
    across(c(self_est2, self_est4), ~recode(., 
                                            "5" = 1, 
                                            "4" = 2, 
                                            "3" = 3, 
                                            "2" = 4, 
                                            "1" = 5))
  ) %>% 
  mutate(
    self_est = rowMeans(dplyr::select(., starts_with("self_est")), na.rm = TRUE)
  )

# Cronbach's alpha 
self_est <- df %>% 
  dplyr::select(self_est1, self_est2, self_est3, self_est4, self_est5) %>%
  data.frame()

# Correlation matrix
lowerCor(self_est)

# Cronbach's alpha
psych::alpha(self_est)

rm(self_est)

### Female ###
df <- df %>% 
  mutate(
    female = ifelse(female == 2, 1, 0)
  )

### Race ###
## Collapse the race variable into one variable names race where the values are 
## as follows. White (1), Black (2), American Indian or Alaska Native (3), 
## Asian (4), Native Hawaiian or Pacific Islander (5), Hispanic or Latino (6), 
## Arab or ME (7)
df <- df %>% 
  mutate(
    race = case_when(
      race7 == 1 ~ 7, 
      race6 == 1 ~ 6, 
      race5 == 1 ~ 5, 
      race4 == 1 ~ 4, 
      race3 == 1 ~ 3, 
      race2 == 1 ~ 2, 
      race1 == 1 ~ 1, 
    )
  )

### Education ###
# Recode Education to (1) less than high school, (2) high school, (3) some college
# (3-some college but not degree, 4-AA degree), (4) college (5-Bachelor's Degree), 
# (5) Advanced Degree (Master's (6), Doctoral (7), Professional Degree (8))
df <- df %>% 
  mutate(
    edu = recode(edu, 
                 "1" = 1, 
                 "2" = 2, 
                 "3" = 3, 
                 "4" = 3, 
                 "5" = 4, 
                 "6" = 5, 
                 "7" = 5, 
                 "8" = 5)
  )

##########
# EXPORT #
########## 

# Specify the folder path
setwd("/Users/martinnaunov/Desktop/Desktop - Martin’s MacBook Pro (2)/Persuasion/2_workingdata")

folder_path <- "/Users/martinnaunov/Desktop/Desktop - Martin’s MacBook Pro (2)/Persuasion/2_workingdata/"

# Create the full file path including the filename and extension 
file_path <- file.path(folder_path, "senders_w1.csv")

# Export the dataframe to CSV 
write.csv(df, file = file_path, row.names = FALSE)

# Check if file exists 
file.exists(file_path) 

